Add support for fast mov_to_kr privops
authordjm@kirby.fc.hp.com <djm@kirby.fc.hp.com>
Sat, 8 Oct 2005 17:37:45 +0000 (11:37 -0600)
committerdjm@kirby.fc.hp.com <djm@kirby.fc.hp.com>
Sat, 8 Oct 2005 17:37:45 +0000 (11:37 -0600)
Signed-off by: Dan Magenheimer <dan.magenheimer@hp.com>

linux-2.6-xen-sparse/arch/ia64/xen/hypercall.S
linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S
linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
xen/arch/ia64/asm-offsets.c
xen/arch/ia64/xen/hyperprivop.S
xen/arch/ia64/xen/privop.c

index 2cadc70a77374140fcd05b4d785534d15a79d849..6fb0a90520faff07ebcf2bb1ac02af190a0ada11 100644 (file)
@@ -202,6 +202,61 @@ GLOBAL_ENTRY(xen_set_rr)
        ;;
 END(xen_set_rr)
 
+GLOBAL_ENTRY(xen_set_kr)
+       movl r8=running_on_xen;;
+       ld4 r8=[r8];;
+       cmp.ne p7,p0=r8,r0;;
+(p7)   br.cond.spnt.few 1f;
+       ;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar0=r9
+(p7)   br.ret.sptk.many rp;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar1=r9
+(p7)   br.ret.sptk.many rp;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar2=r9
+(p7)   br.ret.sptk.many rp;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar3=r9
+(p7)   br.ret.sptk.many rp;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar4=r9
+(p7)   br.ret.sptk.many rp;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar5=r9
+(p7)   br.ret.sptk.many rp;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar6=r9
+(p7)   br.ret.sptk.many rp;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar7=r9
+(p7)   br.ret.sptk.many rp;;
+
+1:     movl r11=XSI_PSR_IC
+       mov r8=r32
+       mov r9=r33
+       ;;
+       ld8 r10=[r11]
+       ;;
+       st8 [r11]=r0
+       ;;
+       XEN_HYPER_SET_KR
+       ;;
+       st8 [r11]=r10
+       ;;
+       br.ret.sptk.many rp
+       ;;
+END(xen_set_rr)
+
 GLOBAL_ENTRY(xen_fc)
        movl r8=running_on_xen;;
        ld4 r8=[r8];;
index 18d3fb35ce1e9f59777030dbee2dcc4df7e5ffd6..a7b33a932f82637225c7e1849d0f5acaf5b4eb2a 100644 (file)
@@ -61,6 +61,12 @@ GLOBAL_ENTRY(ia64_switch_to)
        shr.u r26=r20,IA64_GRANULE_SHIFT
        cmp.eq p7,p6=r25,in0
        ;;
+#ifdef CONFIG_XEN
+       movl r8=XSI_PSR_IC
+       ;;
+       st4 [r8]=r0     // force psr.ic off for hyperprivop(s)
+       ;;
+#endif
        /*
         * If we've already mapped this task's page, we can skip doing it again.
         */
@@ -69,18 +75,24 @@ GLOBAL_ENTRY(ia64_switch_to)
        ;;
 .done:
 #ifdef CONFIG_XEN
+       // psr.ic already off
+       // update "current" application register
+       mov r8=IA64_KR_CURRENT
+       mov r9=in0;;
+       XEN_HYPER_SET_KR
+       ld8 sp=[r21]                    // load kernel stack pointer of new task
        movl r27=XSI_PSR_IC
        mov r8=1
        ;;
-(p6)   st4 [r27]=r8
+       st4 [r27]=r8                    // psr.ic back on
        ;;
 #else
 (p6)   ssm psr.ic                      // if we had to map, reenable the psr.ic bit FIRST!!!
        ;;
 (p6)   srlz.d
-#endif
        ld8 sp=[r21]                    // load kernel stack pointer of new task
        mov IA64_KR(CURRENT)=in0        // update "current" application register
+#endif
        mov r8=r13                      // return pointer to previously running task
        mov r13=in0                     // set "current" pointer
        ;;
@@ -93,9 +105,7 @@ GLOBAL_ENTRY(ia64_switch_to)
 
 .map:
 #ifdef CONFIG_XEN
-       movl r27=XSI_PSR_IC
-       ;;
-       st4 [r27]=r0
+       // psr.ic already off
 #else
        rsm psr.ic                      // interrupts (psr.i) are already disabled here
 #endif
@@ -115,13 +125,17 @@ GLOBAL_ENTRY(ia64_switch_to)
        st8 [r8]=in0                     // VA of next task...
        ;;
        mov r25=IA64_TR_CURRENT_STACK
+       // remember last page we mapped...
+       mov r8=IA64_KR_CURRENT_STACK
+       mov r9=r26;;
+       XEN_HYPER_SET_KR;;
 #else
        mov cr.itir=r25
        mov cr.ifa=in0                  // VA of next task...
        ;;
        mov r25=IA64_TR_CURRENT_STACK
-#endif
        mov IA64_KR(CURRENT_STACK)=r26  // remember last page we mapped...
+#endif
        ;;
        itr.d dtr[r25]=r23              // wire in new mapping...
        br.cond.sptk .done
index 3d245fff8c38f30deeecfdd2cec7b9ee1ddda4ea..e746cd93b4146aa4e40f4b51cfd4d7bc9a75ccfe 100644 (file)
@@ -32,6 +32,7 @@
 #define        XEN_HYPER_ITR_D                 break 0xf
 #define        XEN_HYPER_GET_RR                break 0x10
 #define        XEN_HYPER_SET_RR                break 0x11
+#define        XEN_HYPER_SET_KR                break 0x12
 #endif
 
 #ifndef __ASSEMBLY__
@@ -93,9 +94,6 @@ extern void xen_set_eflag(unsigned long);     /* see xen_ia64_setreg */
        XEN_HYPER_SSM_I;                                                \
 })
 
-// for now, just use privop.  may use hyperprivop later
-/*#define xen_set_kr(regnum,val) (__ia64_setreg(regnum,val)) */
-
 /* turning off interrupts can be paravirtualized simply by writing
  * to a memory-mapped virtual psr.i bit (implemented as a 16-bit bool) */
 #define xen_rsm_i()    xen_set_virtual_psr_i(0)
@@ -157,6 +155,7 @@ extern void xen_set_tpr(unsigned long);
 extern void xen_eoi(void);
 extern void xen_set_rr(unsigned long index, unsigned long val);
 extern unsigned long xen_get_rr(unsigned long index);
+extern void xen_set_kr(unsigned long index, unsigned long val);
 
 /* Note: It may look wrong to test for running_on_xen in each case.
  * However regnum is always a constant so, as written, the compiler
@@ -193,9 +192,8 @@ extern unsigned long xen_get_rr(unsigned long index);
 ({                                                                     \
        switch(regnum) {                                                \
        case _IA64_REG_AR_KR0 ... _IA64_REG_AR_KR7:                     \
-/* for now, just use privop.  may use hyperprivop later */             \
-/*             (running_on_xen) ?                                      \
-                       xen_set_kr((regnum-_IA64_REG_AR_KR0), val) : */ \
+               (running_on_xen) ?                                      \
+                       xen_set_kr((regnum-_IA64_REG_AR_KR0), val) :    \
                        __ia64_setreg(regnum,val);                      \
                break;                                                  \
        case _IA64_REG_CR_ITM:                                          \
index c28d762aba75aa4dacf30dc7251226a502fb9f20..d1a6893fdd071f6498529e43f2475f47207a7e25 100644 (file)
@@ -69,6 +69,7 @@ void foo(void)
        DEFINE(XSI_TPR_OFS, offsetof(mapped_regs_t, tpr));
        DEFINE(XSI_PTA_OFS, offsetof(mapped_regs_t, pta));
        DEFINE(XSI_ITV_OFS, offsetof(mapped_regs_t, itv));
+       DEFINE(XSI_KR0_OFS, offsetof(mapped_regs_t, krs[0]));
        //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
        //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
        //DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
index cb314386fbee72a5b40e6bd8cb17b04d4a9fda90..cddfba7f69ea73abf809cf8116ecc9a107fafa5d 100644 (file)
@@ -44,6 +44,7 @@
 #define    XEN_HYPER_ITR_D         0xf
 #define    XEN_HYPER_GET_RR        0x10
 #define    XEN_HYPER_SET_RR        0x11
+#define    XEN_HYPER_SET_KR        0x12
 
 #ifdef CONFIG_SMP
 #warning "FIXME: ptc.ga instruction requires spinlock for SMP"
@@ -169,6 +170,10 @@ GLOBAL_ENTRY(fast_hyperprivop)
        cmp.eq p7,p6=XEN_HYPER_THASH,r17
 (p7)   br.sptk.many hyper_thash;;
 
+       // HYPERPRIVOP_SET_KR?
+       cmp.eq p7,p6=XEN_HYPER_SET_KR,r17
+(p7)   br.sptk.many hyper_set_kr;;
+
        // if not one of the above, give up for now and do it the slow way
        br.sptk.many dispatch_break_fault ;;
 
@@ -1459,6 +1464,62 @@ ENTRY(hyper_set_rr)
        ;;
 END(hyper_set_rr)
 
+ENTRY(hyper_set_kr)
+       extr.u r25=r8,3,61;;
+       cmp.ne p7,p0=r0,r25     // if kr# > 7, go slow way
+(p7)   br.spnt.many dispatch_break_fault ;;
+#ifdef FAST_HYPERPRIVOP_CNT
+       movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_KR);;
+       ld8 r21=[r20];;
+       adds r21=1,r21;;
+       st8 [r20]=r21;;
+#endif
+       adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18 ;;
+       shl r20=r8,3;;
+       add r22=r20,r21;;
+       st8 [r21]=r9;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar0=r9;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar1=r9;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar2=r9;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar3=r9;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar4=r9;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar5=r9;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar6=r9;;
+       cmp.eq p7,p0=r8,r0
+       adds r8=-1,r8;;
+(p7)   mov ar7=r9;;
+       // done, mosey on back
+1:     mov r24=cr.ipsr
+       mov r25=cr.iip;;
+       extr.u r26=r24,41,2 ;;
+       cmp.eq p6,p7=2,r26 ;;
+(p6)   mov r26=0
+(p6)   adds r25=16,r25
+(p7)   adds r26=1,r26
+       ;;
+       dep r24=r26,r24,41,2
+       ;;
+       mov cr.ipsr=r24
+       mov cr.iip=r25
+       mov pr=r31,-1 ;;
+       rfi
+       ;;
+END(hyper_set_kr)
+
 // this routine was derived from optimized assembly output from
 // vcpu_thash so it is dense and difficult to read but it works
 // On entry:
index def644c30e691013f78d0d775c9d16fcb0c09402..ed50eb0d2f5fc9ed738b926721ddc3026c99bceb 100644 (file)
@@ -757,12 +757,13 @@ priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr)
 #define HYPERPRIVOP_ITR_D              0xf
 #define HYPERPRIVOP_GET_RR             0x10
 #define HYPERPRIVOP_SET_RR             0x11
-#define HYPERPRIVOP_MAX                        0x11
+#define HYPERPRIVOP_SET_KR             0x12
+#define HYPERPRIVOP_MAX                        0x12
 
 char *hyperpriv_str[HYPERPRIVOP_MAX+1] = {
        0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i",
        "=ivr", "=tpr", "tpr=", "eoi", "itm=", "thash", "ptc.ga", "itr.d",
-       "=rr", "rr=",
+       "=rr", "rr=", "kr=",
        0
 };
 
@@ -848,6 +849,9 @@ ia64_hyperprivop(unsigned long iim, REGS *regs)
            case HYPERPRIVOP_SET_RR:
                (void)vcpu_set_rr(v,regs->r8,regs->r9);
                return 1;
+           case HYPERPRIVOP_SET_KR:
+               (void)vcpu_set_ar(v,regs->r8,regs->r9);
+               return 1;
        }
        return 0;
 }